###############################################################################   
#### Replication Materials                                                 #### 
#### Taegyoon Kim, 2022. Violent Political Rhetoric on Twitter.            ####
#### Political Science Research and Methods                                ####
###############################################################################  


###############################################################################
################################### Set Up ####################################
###############################################################################

# packages -------------------------

lapply(c('readr', 'xtable', 'dplyr', 'MASS', 'texreg', 'pscl'), 
       require, 
       character.only = TRUE)

###############################################################################
############################ Generate Tables 1 & 2 ############################
###############################################################################


# load data -------------------------

# Note: tweet text cannot be shared to third-party entities. The tweet text was
# lower-cased before hashtags were extracted

path_data <- '/kim_psrm_replication/data/' 
path_output <- '/kim_psrm_replication/output/' 

df_hashtag <- read_csv(paste0(path_data, 'df_hashtag.csv'),
                       col_types = cols(X1 = col_skip()))

# table 1 -------------------------

df_hashtag <- data.frame(df_hashtag)
colnames(df_hashtag) <- c('Count', 'Hashtag')
head(df_hashtag,30)
xtable(head(df_hashtag,30))
write.csv(head(df_hashtag,30), 
          paste0(path_output, 'tbl1.csv'))

# table 2 -------------------------

# Note: hashtags were extracted using the same approach as in table 1. The data
# set includes the number of hahstags as well.

df_hashtag_weekly <- readRDS(file = paste0(path_data, 'df_hashtag_weekly.rds'))
write.csv(df_hashtag_weekly, 
          paste0(path_output, 'tbl2.csv'))


###############################################################################
#################### Generate Tables 3 & 4 & A7 & A8 & A9 ##################### 
###############################################################################

# load data -------------------------

df_mention <- read_csv(paste0(path_data, 'df_mention.csv'))
df_mention$position <- df_mention$office

# table 3 -------------------------

mention_mean_position <- df_mention %>%
  filter(position != 'collective') %>%
  group_by(position) %>%
  summarise(mention_count = mean(handle_include_count)) %>%
  arrange(desc(mention_count))

mention_mean_position$position <- case_when(
  mention_mean_position$position == 'p' ~ 'Trump',
  mention_mean_position$position == 'vp' ~ 'Pence',
  mention_mean_position$position == 'p_cand' ~ 'Biden',
  mention_mean_position$position == 'vp_cand' ~ 'Harris',
  mention_mean_position$position == 'senator' ~ 'Senators',
  mention_mean_position$position == 'representative' ~ 'Representatives',
  mention_mean_position$position == 'governor' ~ 'Governors'
  )

mention_mean_party <- df_mention %>%
  filter(position != 'collective' & position != 'p') %>%
  mutate(party_binary = ifelse(party == 'R', 'Republican', 'Non-Republican')) %>%
  group_by(party_binary) %>%
  summarise(mention_count = mean(handle_include_count)) %>%
  arrange(desc(mention_count))

mention_mean_gender <- df_mention %>%
  filter(position != 'collective' & position != 'p') %>%
  group_by(gender) %>%
  summarise(mention_count = mean(handle_include_count)) %>%
  arrange(gender)

mention_mean_gender$gender <- case_when(
  mention_mean_gender$gender == 'F' ~ 'Women',
  mention_mean_gender$gender == 'M' ~ 'Men'
  )

colnames(mention_mean_position)[1] <- 'type'
colnames(mention_mean_party)[1] <- 'type'
colnames(mention_mean_gender)[1] <- 'type'

mention_mean_entire <- rbind(mention_mean_position, 
                             mention_mean_party, 
                             mention_mean_gender)

xtable(mention_mean_entire)
write.csv(mention_mean_entire, paste0(path_output, 'tbl3.csv'))

# table 4 -------------------------

df_mention <- df_mention %>%
  filter(position != 'collective')

df_mention$position <- case_when(
  df_mention$position == 'p' ~ 'Trump',
  df_mention$position == 'vp' ~ 'Pence',
  df_mention$position == 'p_cand' ~ 'Biden',
  df_mention$position == 'vp_cand' ~ 'Harris',
  df_mention$position == 'senator' ~ 'Senators',
  df_mention$position == 'representative' ~ 'Representatives',
  df_mention$position == 'governor' ~ 'Governors'
  )

df_mention$republican <- ifelse(df_mention$party == 'R', 1, 0)
df_mention$democrat <- ifelse(df_mention$party != 'R', 1, 0)
df_mention$female <- ifelse(df_mention$gender == 'F', 1, 0)
df_mention$followers_count_log <- log10(df_mention$followers_count)

df_mention_cands <- df_mention[which(
  df_mention$position == 'Representatives' 
  | df_mention$position == 'Governors'
  | df_mention$position == 'Biden'
  | df_mention$position == 'Pence'
  | df_mention$position == 'Harris'
  | df_mention$position == 'Senators'),]
df_mention_cands$position <- as.factor(df_mention_cands$position)
df_mention_cands$position <- relevel(df_mention_cands$position, ref = 'Representatives')

df_mention_subset <- df_mention_cands[which(df_mention_cands$position == 'Representatives' 
                                      | df_mention_cands$position == 'Governors'
                                      | df_mention_cands$position == 'Senators'),]
df_mention_subset$position <- as.factor(df_mention_subset$position)
df_mention_subset$position <- relevel(df_mention_subset$position, ref = 'Representatives')


nb_position <- glm.nb(handle_include_count ~ position, 
                    data = df_mention_subset)
nb_female <- glm.nb(handle_include_count ~ female, 
                    data = df_mention_subset)
nb_republican <- glm.nb(handle_include_count ~ republican, 
                        data = df_mention_subset)
nb_follower <- glm.nb(handle_include_count ~ followers_count_log, 
                      data = df_mention_subset)
nb_all <- glm.nb(handle_include_count ~ position + republican  + female + followers_count_log, 
                 data = df_mention_subset)

regression_main <- texreg(list(nb_position, nb_female, nb_republican, nb_follower, nb_all))
write.table(regression_main, 
            file = paste0(
              path_output, 
              'tbl4.tex'), 
            row.names = FALSE, 
            col.names = FALSE)

# table a7 -------------------------

tabla7_mention <- data.frame(unclass(summary(df_mention_subset$handle_include_count)))
tabla7_follower <- data.frame(unclass(summary(df_mention_subset$followers_count)))
tabla7_gender <- data.frame(table(df_mention_subset$gender))
tabla7_party <- data.frame(table(df_mention_subset$party))
tabla7_position <- data.frame(table(df_mention_subset$position))

write.csv(tabla7_mention, paste0(path_output, 'tbla7_mention.csv'))
write.csv(tabla7_follower, paste0(path_output, 'tbla7_follower.csv'))
write.csv(tabla7_gender, paste0(path_output, 'tbla7_gender.csv'))
write.csv(tabla7_party, paste0(path_output, 'tbla7_party.csv'))
write.csv(tabla7_position, paste0(path_output, 'tbla7_position.csv'))

# table a8 -------------------------

nb_all_cand <- glm.nb(handle_include_count ~ position + female + republican + followers_count_log, 
                      data = df_mention_cands)

regression_cands <- texreg(nb_all_cand)
write.table(regression_cands, 
            file = paste0(path_output, 'tbla8.tex'), 
            row.names = FALSE, 
            col.names = FALSE)

# table a9 -------------------------

zi_all <-  zeroinfl(handle_include_count ~ position + female + republican + followers_count_log  | # Predictor for the Poisson process
                      position + female + republican + followers_count_log, # Predictor for the Bernoulli process
                    dist = 'negbin',
                    data = df_mention_subset)
  regression_zeroinfl<- texreg(zi_all)
write.table(regression_zeroinfl, 
            file = paste0(path_output, 'tbla9.tex'), 
            row.names = FALSE, 
            col.names = FALSE)